/******************************************************************************
Description:

*	This program merges lottery numbers with application data to produce
*	a reshaped match file. It then defines ed opt buckets and global priorities,
*	using this information to perform the DA match.
*
*	----------------------------------------------------------------------------
*	inputs: 		swd_classification_2012_2016.csv
*						> SWD program indicators
*					{yyyy}_lottery.csv
*						> student lottery number files
*					20{yy-1}-{yy}_June-Biog_PK-12_Scrambled.csv
*						> biog data for ELL variable, 2015 and 2016
*					20{yy-1}-{yy}_HSAPS_Scrambled.dta (*!*)
*						> applications data
*					edopt_matches_clean.dta
*						> Ed.Opts matches
*					*nyc_match_reshape{yyyy}.dta
*						> match file at studentXprogram level
*					**match_R1_current_forpscore_{yyyy}.txt
*						> match assignments from DA script
*	dependencies:	daaNYC_sims.pl
*						> Perl DA script
*	----------------------------------------------------------------------------
*	intmd. outputs: *nyc_match_reshape{yyyy}.dta
*						> match file at studentXprogram level
*					choice_file_R1_current_forpscore_{yyyy}.txt
*						> choice file for DA
*					priority_file_R1_current_forpscore_{yyyy}.txt
*						> priority file for DA
*					rsid_file_R1_current_forpscore_{yyyy}.txt
*						> student ID's for DA
*	outputs: 		match_file_for_pscore_with_sim_lotteries{yyyy}_new.dta
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
* 	settings
	clear all
	set more off
	set trace off
	set tracedepth 1
	pause on
	set seed 1234

*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* 	switches
	local reshape 1 //build reshaped match data as input for DA
	local runDA   1 //run DA
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

foreach year of numlist 2012/2014 {

*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*  I. Build the reshaped match data
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

	if `reshape' == 1 {

		* ----------------------------------------------------------------------
		*  Load data sets
		* ----------------------------------------------------------------------

		* Load and save Students with Disability (SWD) program indicators, to merge in later
		insheet using "${rawdata}/NYCDOE/DisabilityClassification/swd_classification_2012_2016_cleaned.csv", clear c
		tempfile swd
		sa "`swd'"

		* Load and clean student lottery number file
		* Note: the year of the lottery number is the spring year

		* Load data
		insheet using "${rawdata}/NYCDOE/LotteryNumbers/`year'_lottery.csv", names clear

		* Deal with missing (dash) tiebreakers
		replace tiebreaker = "zzzzzz" if tiebreaker == "-"
		sort tiebreaker
		gen lottery_rank = _n
		replace lottery_rank = . if  tiebreaker == "zzzzzz"
		gen dashed_lottery =   tiebreaker == "zzzzzz"

		* Save temp file to merge
		tempfile lottery_numbers
		sa "`lottery_numbers'"

		* Load Bio data for English Language Learners (ELL) variable in years 2015 and 2016
		if inlist("`year'","2015","2016") {
			local digits = `year' - 2000
			local digits_minus = `digits' - 1
			insheet using "${rawdata}/NYCDOE/JuneBiog/20`digits_minus'-`digits'_June-Biog_PK-12_Scrambled.csv", comma names clear
			keep student_id_scram grade_level ell
			destring student_id_scram ell, replace
			keep if grade_level == "08"
			drop grade_level
			tempfile biog
			save "`biog'"
		}

		* Load applications data
		local digits = `year' - 2000
		local digits_minus = `digits' - 1
		* For some reason, this is the file that MDRD2 uses. Even though it seems to be the exact
		* same file, it causes the rest of the program to break. So, for the time being, we
		* will use this data  (*!*)
		use   "${cleandata}/match/HSAPS/20`digits_minus'-`digits'_HSAPS_Scrambled.dta", replace
		format %12.0g student_id_scram

		* ----------------------------------------------------------------------
		*  Merge data sets
		* ----------------------------------------------------------------------

		// The year in SWD file corresponds to the fall year of the academic year
		// applicants is assigned in the years in the HSAPS (match file),
		// which corresponds to the academic year students apply,
		// so e.g. HSAPS 2013-14 means they apply in that school year
		// to attend 9th grade at a school in the 2014-15 academic year
		gen fallyear = `year'

		* Merge in SWD program flag
		merge m:1 fallyear sms_group using "`swd'", keep(1 3) nogen

		// change related to (*!*), above
		//destring(student_id_scram), replace

		* Merge in student lotto numbers
		merge 1:1 student_id_scram using  "`lottery_numbers'", gen(lottery_merge) keep( 1 3 )

		* ----------------------------------------------------------------------
		*  Simulating Lottery Numbers
		* ----------------------------------------------------------------------

		* Rescale lottery rank to [0,1]
		su lottery_rank
		gen double lottery_rank_mod_new = lottery_rank   / r(max)
		drop lottery_rank
		ren lottery_rank_mod_new lottery_rank
		gen lottery_rank_missing = lottery_rank   ==.
	
		* Draw random number if missing lottery rank
		isid stu 
		so stu 
		gen double shuffle = runiform(0,1) if lottery_rank   ==. // Make sure it is double
		replace lottery_rank = shuffle if lottery_rank   ==.
		
		* Assert that there aren't applicants who have the same lottery number
		* (NYC vs. our draw)
		duplicates  tag lottery_rank , gen(lottery_dups)
		su lottery_dups
		assert r(max) == 0

		* Merge ELL in years 2015 and 2016
		if inlist("`year'","2015","2016") {
			merge 1:1 student_id_scram using `biog', keep(1 3)
		}

		* ----------------------------------------------------------------------
		*  Clean application variables
		* ----------------------------------------------------------------------

		* Standardize test scores
		foreach subject in math ela {
			destring(`subject'_score), replace
			bys `subject'_test_name: egen mean`subject' = mean(`subject'_score)
			bys `subject'_test_name: egen sd`subject' = sd(`subject'_score)
			gen bl_ss_`subject' = (`subject'_score - mean`subject') / sd`subject'
		}

		* Harmonize variables across years
		if `year' >= 2015 {
			ren r1_match specialized_hs_program_offered
			gen top_2_percent = 0
		}

		* Restrict variable set
		keep student_id_scram grade_level final_disposition* r1* reading_category ///
			lottery_rank lottery_rank_missing dashed_lottery specialized_hs_program_offered ///
			shsat_offer top_2_percent opt_out_status any_lag_offer status sms_group ///
			swd borough ela_score math_score ela_test_name math_test_name ell ///
			bl_ss_math bl_ss_ela

		* ----------------------------------------------------------------------
		*  Adding Ed.Opts Bucket info
		* ----------------------------------------------------------------------

		* Merging in which Ed.Opts bucket were received (see MDRD2 Data Appendix for more info)
		gen year = `year'

		preserve
			use "${cleandata}match/EdoptMatches/EdOpt_Matches.dta", clear

			* Generate spring year variable
			gen year = 2000 + real(substr( session_yr , length( session_yr ) -1, 2))

			destring stu, replace

			tempfile edopts
			save "`edopts'"
		restore

		merge 1:m student_id year using "`edopts'", ///
			keep(1 3) gen(edopts_merge) keepusing(edopt_match  round rank_vs_lottery category)

		* Only keeping Ed.Opts match from first round
		drop if round != "1" & edopts_merge == 3
		replace rank_vs_lottery = "RANDOM" if rank_vs_lottery == "LOTTERY"
		replace rank_vs_lottery = "SELECTED" if rank_vs_lottery == "RANKING"

		* ----------------------------------------------------------------------
		*  Reshape
		* ----------------------------------------------------------------------

		// Reshape to be unique at the level of student X program applied to.
		// We focus only on round 1 of the assignment. This is because
		// we have insufficient information to replicate round 2.

		reshape long r1match r1programcode r1programtype r1rank r1prioritygroup r1eligibility  ///
					 , i(student_id_scram) j(choice)

		* Harmonize variable names and correct types
		rename (student_id r1programcode r1prioritygroup r1rank r1eligibility r1programtype grade_level specialized_hs_program_offered) ///
				(stu prg priority rank eligibility prg_type grade prg_offered)

		la var stu 			"Student ID Scrambled"
		la var prg 			"Program Code of program applied to"
		la var prg_offered 	"Code of Program Offered in first round"
		la var priority 		"Student's priority in program"
		la var rank 			"Student's rank in program"

		// change related to (*!*) above
		// destring priority rank eligibility, replace

		* ----------------------------------------------------------------------
		*  Save
		* ----------------------------------------------------------------------
		save "${cleandata}/nyc_match_reshape`year'.dta", replace
		
	}

*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*  II. Prep and run DA
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

	if `runDA' == 1	 {

		* -------------------------------------------------------------------------------------------------
		*  Restrict sample to applications that can be used in DA (eligible applicants without disability
		* -------------------------------------------------------------------------------------------------

		use "${cleandata}/nyc_match_reshape`year'.dta", clear

		* Get round 1 eligibility
		if `year' >= 2015 {
			gen temp = eligibility == "Y"
			drop eligibility
			ren temp eligibility
		}

		* Drop ineligible applicants
		drop if eligibility != 1

		* Drop applications to SWD programs.
		* note that the applicants who go through the SWD process change by year.
		drop if swd == 1

		// Note: At Ed.Opts, the top two percent performers are guaranteed seats if
		// they rank an Ed.Opts first.
		// We're thus giving top two percent applicants a higher priority at that program.
		// The info about the top 2 percent is found in the bucket info.
		// For years after 2015 (inclusive) there seems to be no top two percent info.

		gen choice_to_change = (choice ==  1 & category == "2%" & prg_type == "Ed. opt." )
		su priority
		replace priority = `r(min)' - 1  if choice_to_change == 1

		* Store locals with total sample size, and sample size with non-missing lotto number
		unique stu
		local total_applicants `r(sum)'

		unique stu if lottery_rank == .
		local applicants_missing_lottery `r(sum)'

		* ----------------------------------------------------------------------------
		*  Generate offer vars
		* ----------------------------------------------------------------------------

		* Generate actual offer indicator (level = student X program)
		gen offer = prg_offered == prg & prg_offered != ""

		* Generate ever_offer indicator (level = student)
		bys stu : egen ever_offer = max(offer)

		* Generate higher_offer indicator (level = student X program)
		sort stu choice
		by stu : gen higher_offer = 0 if _n == 1
		by stu : replace higher_offer = max(offer[_n-1] , higher_offer[_n-1] ) if _n > 1

		* Generate total_offers (level = program)
		bys prg: egen total_offers = total(offer)


		* Recording this for a check further down
		su offer
		global orig_offer_sum = `r(sum)'

		* ----------------------------------------------------------------------------
		* Ed.Opts (See MDRD2 Data appendix for detailed explanation of assignment rules)
		* ----------------------------------------------------------------------------

		// The next piece is only for DA. Ed.Opts programs are split into
		// low, middle, and high buckets based on your performance in a reading test
		// The category_code variable contains information about the group you an
		// applicant is in (high, middle, or low) but even if an applicant is in
		// e.g. the high group, they automatically also apply for middle and low
		// seats in the order which is coded below.
		// Note also that Ed.Opts programs split into random and selected sub-programs,
		// so each Ed.Opts program actually has 6 sub-programs we need to consider.
		// (high, mid, low) X (random, selected).


		* a. Imputing which bucket an applicant got if they got an offer from Ed.Opts

			* If an applicant got an offer from Ed.Opts, and has an empty category bucket,
			* we will replace it with their reading category
			bys stu : egen edopt_offer = max(offer * (prg_type == "Ed. opt."))

			* Change category
			replace category = reading_category if category == "" & edopt_offer == 1

			* Change whether random or selected bucket (lottery or running variable),
			* setting them all to random if missing
			replace  rank_vs_lottery = "RANDOM" if  rank_vs_lottery == "" & edopt_offer == 1

			* Getting suffix that captures Ed.Opts bucket (high/mid/low random/select)
			gen edopt_offered_suffix = substr(category, 1, 1) + substr(rank_vs_lottery, 1, 1)  if category != "2%"
			replace edopt_offered_suffix =  "2%"  if category == "2%"

		*  b. Setting-up ed opt buckets

			* Get amount offered to each category for Ed.Opts schools.
			* use this for capacities of Ed.Opts.
			bys prg reading_category: egen category_offers = total(offer) if prg_type == "Ed. opt."

			* Expand applications, so we can consider an application to each "sub-program"
			expand 6 if lower(prg_type) == "ed. opt."

			* Generate bkt_n (1-6) to keep track of the 6 "sub-apps" per Ed.Opts application
			sort stu prg, stable
			by stu prg:  gen bkt_n = _n  if lower(prg_type) == "ed. opt."

			* Set which bucket they applied to, which depends on whether they are high, middle, or low
			gen bkt  = ""

			// Note that within the high/mid/low categories, applicants always
			// apply to the select bucket first and then the random bucket.
			// The sorting varies by category group of the student as outlined
			// below


			* Middle applies to middle then high then low
			local n = 0
			local n = `n' + 1
			replace bkt = "MS" if bkt_n == `n' & reading_category == "MIDDLE"
			local n = `n' + 1
			replace bkt = "MR" if bkt_n == `n' & reading_category == "MIDDLE"
			local n = `n' + 1
			replace bkt = "HS" if bkt_n == `n' & reading_category == "MIDDLE"
			local n = `n' + 1
			replace bkt = "HR" if bkt_n == `n' & reading_category == "MIDDLE"
			local n = `n' + 1
			replace bkt = "LS" if bkt_n == `n' & reading_category == "MIDDLE"
			local n = `n' + 1
			replace bkt = "LR" if bkt_n == `n' & reading_category == "MIDDLE"

			* High applies to high then middle then low
			local n = 0
			local n = `n' + 1
			replace bkt = "HS" if bkt_n == `n' & reading_category == "HIGH"
			local n = `n' + 1
			replace bkt = "HR" if bkt_n == `n' & reading_category == "HIGH"
			local n = `n' + 1
			replace bkt = "MS" if bkt_n == `n' & reading_category == "HIGH"
			local n = `n' + 1
			replace bkt = "MR" if bkt_n == `n' & reading_category == "HIGH"
			local n = `n' + 1
			replace bkt = "LS" if bkt_n == `n' & reading_category == "HIGH"
			local n = `n' + 1
			replace bkt = "LR" if bkt_n == `n' & reading_category == "HIGH"

			* Low applies to low then high then middle
			local n = 0
			local n = `n' + 1
			replace bkt = "LS" if bkt_n == `n' & reading_category == "LOW"
			local n = `n' + 1
			replace bkt = "LR" if bkt_n == `n' & reading_category == "LOW"
			local n = `n' + 1
			replace bkt = "HS" if bkt_n == `n' & reading_category == "LOW"
			local n = `n' + 1
			replace bkt = "HR" if bkt_n == `n' & reading_category == "LOW"
			local n = `n' + 1
			replace bkt = "MS" if bkt_n == `n' & reading_category == "LOW"
			local n = `n' + 1
			replace bkt = "MR" if bkt_n == `n' & reading_category == "LOW"

			* Change bucket applied to for 2 percent applicants
			replace bkt = "2%" if bkt_n == 1 & category == "2%"

			* Indicators for applications to selected/random, and high/middle/low
			gen edopt_selected 		= 	inlist(bkt, "HS", "MS", "LS")
			gen edopt_random		= 	inlist(bkt, "HR", "MR", "LR")

			gen app_high 			=  	inlist(bkt, "HS",  "HR" )
			gen app_middle  		=  	inlist(bkt, "MS", "MR")
			gen app_low  			=  	inlist(bkt,"LS", "LR" )

			* Create a unique program ID to incorporate Ed.Opts "sub-programs"
			* Note: For Ed.Opts program this id needs to match the id for the expanded selection.
			gen prg_bkt = prg
			replace prg_bkt = prg_bkt + "_" +  bkt  if lower(prg_type) == "ed. opt."

		*  c. Computing Ed.Opts capacities

			* First modify offer variable to take into account that we expanded the Ed.Opts
			gen offer_mod = offer
			replace offer_mod = (offer == 1 & bkt == edopt_offered_suffix) if lower(prg_type) == "ed. opt."

			* Assert that the original offer sum corresponds to the new number of
			* offers after expanding.
			su offer_mod
			di "$orig_offer_sum `r(sum)'"
			assert $orig_offer_sum  == `r(sum)'

			* Set capacity to number of offers
			bys prg_bkt: egen capacity = total(offer_mod)

		*  d. Changing choices and priorities for Ed.Opts

			* Generate augmented choices to incorporate all Ed.Opts "sub-programs" into the list
			sort stu choice bkt_n
			by stu: gen choice_augmented = _n

			* Create numerical version of reading category
			gen category_code = 1 if reading_category == "HIGH"
			replace category_code = 2 if reading_category == "MIDDLE"
			replace category_code = 3 if reading_category == "LOW"

			* Generate category_mod to track priorities of students at different Ed.Opts buckets
			gen category_mod = 0 if prg_type != "Ed. opt."

			* Middle program: (1) middle, (2) low, (3) high
			replace category_mod = 1 if reading_category == "MIDDLE"  	& inlist(bkt , "MR" , "MS")
			replace category_mod = 2 if reading_category == "LOW" 		& inlist(bkt , "MR" , "MS")
			replace category_mod = 3 if reading_category == "HIGH"  	& inlist(bkt , "MR" , "MS")

			* low program: (1) low, (2) middle, (3) high
			replace category_mod = 1 if reading_category == "LOW" 		& inlist(bkt , "LR" , "LS")
			replace category_mod = 2 if reading_category == "MIDDLE"  	& inlist(bkt , "LR" , "LS")
			replace category_mod = 3 if reading_category == "HIGH"  	& inlist(bkt , "LR" , "LS")

			* high program: (1) high, (2) middle, (3) low
			replace category_mod = 1 if reading_category == "HIGH"  	& inlist(bkt , "HR" , "HS")
			replace category_mod = 2 if reading_category == "MIDDLE"  	& inlist(bkt , "HR" , "HS")
			replace category_mod = 3 if reading_category == "LOW" 		& inlist(bkt , "HR" , "HS")

			* Generate coarse high/mid/low bucket variable
			gen bkt_category = "HIGH" 		if inlist(bkt , "HR" , "HS")
			replace bkt_category = "MIDDLE" 	if inlist(bkt , "MR" , "MS")
			replace bkt_category = "LOW" 		if inlist(bkt , "LR" , "LS")

		*  e. Clean up priority variables

			// Modify lottery and rank vars for Ed.Opts applications, because half of Ed.Opts
			// "sub-programs" use lotto only and half use rank only

			* Lottery rank variable
			gen double lottery_rank_mod = lottery_rank

			* Screened rank variable

			gen rank_mod = rank
			replace rank_mod = 0 if edopt_random == 1

			* If you have a missing rank for screened schools you get sent to the back,
			* so we replace the priority with a high number
			replace priority = 10000 if rank == . & prg_type  == "Screened"

			* Please note: You actually get a higher priority at Ed.Opts Random Buckets if
			* you rank them higher, so we need to modify the choice var for Ed.Opts

			gen choice_mod = 0
			replace choice_mod = choice if edopt_random == 1

			* Note: in 2012 actually for limited unscreened and zoned too
			if `year' == 2012 {
				replace choice_mod = choice if prg_type == "Limited Unscreened"
				replace choice_mod = choice if prg_type == "Zoned"
			}

		* ----------------------------------------------------------------------------
		*  Identify global priority
		* ----------------------------------------------------------------------------

		/* We now have two variables denoting priority: the actual priority var "priority",
		 and the variable we created for Ed.Opts priorities "category_mod". Here, we unify
		 these into one single "global" priority variable. Also note, we now have priorities specific to
		 the "augmented" program ID's, which means we have separate priorities for
		 buckets within Ed.Opts programs.*/

		// Generate a rank_mod_alt variable to get the global priority
		// group for Limited Unscreened applications, where rank_mod is only 1 for
		// applicants who attended a fair and . otherwise. We essentially recode it
		// as a priority group. This variable is then constant for other
		// programs.

		gen rank_mod_alt = 0
		replace rank_mod_alt = rank_mod if inlist(prg_type ,"Limited Unscreened")

		* Generate Global Priority
		preserve
			keep prg_bkt category_mod priority choice_mod rank_mod_alt
			duplicates drop

			// sorting as we would do to simulate da
			// here we use rank_mod_alt but in practice sorting with rank_mod gives the same result
			sort prg_bkt category_mod priority choice_mod rank_mod_alt
			// preserve this ranking and generate one variable that preserve the ordering
			by prg_bkt: gen global_priority = _n
			tempfile prio_new
			sa "`prio_new'"
		restore

		* Merge global priority variable
		merge m:1 prg_bkt category_mod priority choice_mod rank_mod_alt using "`prio_new'", nogen

		* Ensure that it has worked and the order is as intended
		sort  prg_bkt global_priority
		gen global_order = _n

		sort prg_bkt  category_mod priority choice_mod rank_mod_alt
		gen right_order = _n

		count if global_order != right_order
		assert `r(N)' == 0

		drop global_order right_order

		* ----------------------------------------------------------------------------
		*  Run DA, get assignments
		* ----------------------------------------------------------------------------

		* First, we need to output the files that will be used as inputs to the DA Perl script

		* Output capacities file
		preserve
			keep prg_bkt capacity
			order prg_bkt capacity
			duplicates drop
			outsheet using "${cleandata}da/capacity_R1_current_forpscore_`year'.txt", replace delimiter(" ") nonames
		restore

		* output choice file
		sort stu choice_augmented
		order stu choice_augmented
		outsheet stu prg_bkt using "${cleandata}da/choice_file_R1_current_forpscore_`year'.txt", replace delimiter(" ") nonames

		* Output priority file
		sort prg_bkt category_mod priority choice_mod rank_mod lottery_rank_mod stu
		order prg_bkt stu
		outsheet prg_bkt stu using "${cleandata}da/priority_file_R1_current_forpscore_`year'.txt", replace delimiter(" ") nonames

		* Output student ids
		preserve
			keep  stu
			gen rsid = stu
			format rsid %20.0g
			order stu rsid
			outsheet using "${cleandata}da/rsid_file_R1_current_forpscore_`year'.txt", replace delimiter(" ") nonames
		restore

		sleep 1000

		// *** RUN DA ***
		shell perl "$ado\daaNYC_sims.pl" ///
			"${cleandata}da/capacity_R1_current_forpscore_`year'.txt" ///
			"${cleandata}da/choice_file_R1_current_forpscore_`year'.txt" ///
			"${cleandata}da/priority_file_R1_current_forpscore_`year'.txt" ///
			"${cleandata}da/match_R1_current_forpscore_`year'.txt"

		sleep 1000

		* Read in DA assignment output file, and merge assignments into our data
		preserve
			insheet using "${cleandata}da/match_R1_current_forpscore_`year'.txt", clear  names  delimiter(" ")
			ren student_id stu
			tempfile match
			sa "`match'"
		restore

		merge m:1 stu using "`match'", gen(has_our_match)

		* ----------------------------------------------------------------------------
		*  Clean post-match data
		* ----------------------------------------------------------------------------

		* In the perl script, if an applicant is not matched, they are assigned to their ID.
		* Replace these as missing here
		tostring stu, gen(temp)
		replace ourmatch = "" if ourmatch == temp
		drop temp

		* Generate stripped_match, which removes the Ed.Opts bucket tags from
		* program names (e.g. H6G3_HS --> H6G3)
		gen stripped_match = substr(ourmatch, 1, 4)

		* Flag where actual assignment is the same as our DA assignment
			//note that this doesn't require us to get the ed opt bucket right
		gen same = prg_offered == stripped_match

		* Flag our DA assignments
		gen my_offer = prg == stripped_match

        * Output file for simulated offers
        preserve
            gen ourmatch_noedops = ourmatch
            replace ourmatch_noedops = edopt_match if !mi(edopt_match)
            keep stu  ourmatch ourmatch_noedops
            duplicates drop
            save "${cleandata}best_guess_simulated_match_`year'.dta", replace
        restore

		* Determine the kind of program the student was offered *in our match*
		preserve
			keep if my_offer == 1
			keep stripped_match prg_type
			duplicates drop
			ren prg_type myoffered_program_type
			tempfile offered_progs
			save "`offered_progs'"
		restore

		merge m:1 stripped_match using "`offered_progs'", gen(prog_match)

		* Get borough of the school from program code
		gen borough_matchschool = substr(stripped_match,1,1)

		* Generate rank that applies to the school we matched a student to
		preserve
			keep if my_offer == 1
			keep stu rank choice
			duplicates drop
			ren rank rank_matched
			ren choice choice_matched
			tempfile rank_matched
			save "`rank_matched'"
		restore

		merge m:1 stu using "`rank_matched'", nogen

		* Determine the kind of program the student was _actually_ offered
		preserve
			keep if offer == 1
			keep prg_offered prg_type offer
			duplicates drop
			ren prg_type offered_program_type
			tempfile offered_progs
			save "`offered_progs'"
		restore

		merge m:1 prg_offered using "`offered_progs'", gen(actual_prog_match)

		* This is the output we use to compute the pscores
		ren offer orig_student_offer
		sa "${cleandata}match_file_for_pscore_with_sim_lotteries`year'_new.dta" , replace
	}

}
